rm(list=ls(all=TRUE))
dev.off()
# Created using R version 3.3.2 under OS X Yosemite 10.10.5
library(plm) # Created using version 1.6-5
library(dplyr) # Created using version 0.5.0
library(tidyr) # Created using version 0.6.1
library(lme4) # Created using version 1.1-12
library(lfe) # Created using version 2.5-1998
library(reshape2) # Created using version 1.4.2
library(stargazer) # Created using version 5.2
library(ggplot2) # Created using version 2.2.1
library(xtable) # Created using version 1.8-2
library(Matrix) # Created using version 1.2-7.1
library(Formula) # Created using version 1.2-1

# NOTE: Set working directory
setwd("~/Downloads/rep")

# File "cooperman_ri.R" creates the outputs loaded here and included in the replication folder.

# Load Data --------
load("cooperman_dataset.Rdata") 
load("ate_sims.Rdata")
#load("ate_sims_rep.Rdata") # Uncomment if not using the downloaded file.

names(dataset)
counties.onlygomez <- unique(dataset$FIPS.County[is.na(dataset$Rainfall12)])
# Subset data to drop 7 counties in Gomez et al. (2007) dataset that do not have out-of-sample rainfall data
data2 <- dataset[!is.na(dataset$Rainfall12),]
data2 <- data2 %>% arrange(FIPS.County,Year)

# Table 1 ------
# Summary statistics
sumstats <- c("Turnout", "Rain", "Rainfall12", "Rainfall100", "Rainfall12.index", "Rainfall12.spi", "Snow", "PcntBlack", "ZPcntHSGrad", "FarmsPerCap", "AdjIncome", "Closing", "Property", "Literacy", "PollTax", "Motor", "GubElection", "SenElection")
sumstats.labels <- c("Voter Turnout", "Rain (in) from Gomez et al. (2007)", "Rain (in) from Author - Kriging 12","Rain (in) from Author - Kriging 100","Rain Index from Author - Kriging 12", "Rain SPI from Author - Kriging 12",  "Snow (in)", "$%$ African American", "$%$ High School Graduates", "Farms per Capita", "Median HH Income", "Closed Days Pre-Election", "Property Requirement Vote Law", "Literacy Vote Law", "Poll Tax Vote Law", "Motor Voter", "Gubernatorial Elec", "Senatorial Elec")
data2.turnout <- subset(data2, !is.na(Turnout))
cat(stargazer(data2.turnout[,sumstats], digits=2,covariate.labels = sumstats.labels, align=T, float=F), file="table1.tex", sep="\n")

# Table 2 ------
# Replication of Gomez et al. (2007) results, covariates omitted

counties.onlygomez <- unique(dataset$FIPS.County[is.na(dataset$Rainfall12)])
rep1 <- lmer(Turnout ~ Rain + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)
rep2 <- lmer(Turnout ~ Rain + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset, subset= !(FIPS.County %in% counties.onlygomez))
rep3 <- lmer(Turnout ~ Rainfall12 + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)
rep4 <- lmer(Turnout ~ Rainfall12.index + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)
rep5 <- lmer(Turnout ~ Rainfall12.spi + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=dataset)

covs.exclude <- c("Year","Snow","PcntBlack", "ZPcntHSGrad", "FarmsPerCap", "AdjIncome", "Closing", "Property", "Literacy", "PollTax", "Motor", "GubElection", "SenElection", "Turnout.Lag")
covlabs.exclude <- c("Rain (inches) - Gomez et al. (2007)", "Rain (inches) - Author", "Rain Index - Author","Rain SPI - Author","Intercept")
cat(stargazer(rep1, rep2, rep3, rep4, rep5, add.lines = list(c("Sample", "Original", "Limited", "Limited", "Limited", "Limited")), no.space=T, omit=covs.exclude, omit.stat = c("rsq", "f"),float=F, dep.var.labels = "Voter Turnout", align=T, covariate.labels = covlabs.exclude), file="table2.tex", sep="\n")

# Table 3 and Figure 4-------
# Calculate RI p-values

gomez.model.inch <- lmer(Turnout ~ Rainfall12 + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs <- fixef(gomez.model.inch)["Rainfall12"]
gomez.model.index <- lmer(Turnout ~ Rainfall12.index + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs.index <- fixef(gomez.model.index)["Rainfall12.index"]
gomez.model.spi <- lmer(Turnout ~ Rainfall12.spi + Snow  + ZPcntHSGrad + AdjIncome + PcntBlack + FarmsPerCap + Closing + Motor + Property + Literacy + PollTax  + GubElection + SenElection + Turnout.Lag + (1|FIPS.County) + as.factor(Year), data=data2)
ate.obs.spi <- fixef(gomez.model.spi)["Rainfall12.spi"]

two.tail.US.spi <- sum(abs(ate$ate.sims.US.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.US.spi)
two.tail.reg.spi <- sum(abs(ate$ate.sims.reg.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.reg.spi)
two.tail.state.spi <- sum(abs(ate$ate.sims.state.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.state.spi)
two.tail.cwa.spi <- sum(abs(ate$ate.sims.cwa.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.cwa.spi)
two.tail.indep.spi <- sum(abs(ate$ate.sims.indep.spi) >= abs(ate.obs.spi))/length(ate$ate.sims.indep.spi)

two.tail.US.index <- sum(abs(ate$ate.sims.US.index) >= abs(ate.obs.index))/length(ate$ate.sims.US.index)
two.tail.reg.index <- sum(abs(ate$ate.sims.reg.index) >= abs(ate.obs.index))/length(ate$ate.sims.reg.index)
two.tail.state.index <- sum(abs(ate$ate.sims.state.index) >= abs(ate.obs.index))/length(ate$ate.sims.state.index)
two.tail.cwa.index <- sum(abs(ate$ate.sims.cwa.index) >= abs(ate.obs.index))/length(ate$ate.sims.cwa.index)
two.tail.indep.index <- sum(abs(ate$ate.sims.indep.index) >= abs(ate.obs.index))/length(ate$ate.sims.indep.index)

two.tail.US <- sum(abs(ate$ate.sims.US) >= abs(ate.obs))/length(ate$ate.sims.US)
two.tail.reg <- sum(abs(ate$ate.sims.reg) >= abs(ate.obs))/length(ate$ate.sims.reg)
two.tail.state <- sum(abs(ate$ate.sims.state) >= abs(ate.obs))/length(ate$ate.sims.state)
two.tail.cwa <- sum(abs(ate$ate.sims.cwa) >= abs(ate.obs))/length(ate$ate.sims.cwa)
two.tail.indep <- sum(abs(ate$ate.sims.indep) >= abs(ate.obs))/length(ate$ate.sims.indep)

# Create Table 3
values <- data.frame(t(as.matrix(c("Rainfall (in)", round(ate.obs,3), two.tail.indep, two.tail.cwa, two.tail.state, two.tail.reg, two.tail.US))), stringsAsFactors = F)
values.index <- data.frame(t(as.matrix(c("Rainfall Index", round(ate.obs.index,3), two.tail.indep.index, two.tail.cwa.index, two.tail.state.index, two.tail.reg.index, two.tail.US.index))), stringsAsFactors = F)
values.spi <- data.frame(t(as.matrix(c("Rainfall SPI", round(ate.obs.spi,3), two.tail.indep.spi, two.tail.cwa.spi, two.tail.state.spi, two.tail.reg.spi, two.tail.US.spi))), stringsAsFactors = F)
colnames(values) <- colnames(values.index) <- colnames(values.spi) <- c("Var", "Estimated ATE", "County", "County Warning Area", "State", "Weather Region", "US")
table <- rbind(values, values.index, values.spi)
table[table=="0"]<- "<.001"
colnames(table) <-  c("Var", "Estimated ATE", "County", "County Warning Area", "State", "Weather Region", "US")
cat(print.xtable(xtable(table,digits=3), floating=F, include.rownames=F), file="table3.tex", sep="\n")

# Plot sampling distributions for Figure 4
treat_meas <- c(rep("Rainfall (inches)", 5000), rep("Rainfall Index", 5000), rep("Rainfall SPI", 5000))
cluster_lev <- rep(c(rep("County", 1000), rep("County Warning Area", 1000), rep("State", 1000), rep("Weather Region", 1000), rep("US", 1000)), 3)
null_ests <- c(ate$ate.sims.indep, ate$ate.sims.cwa, ate$ate.sims.state, ate$ate.sims.reg, ate$ate.sims.US, ate$ate.sims.indep.index,ate$ate.sims.cwa.index, ate$ate.sims.state.index, ate$ate.sims.reg.index, ate$ate.sims.US.index, ate$ate.sims.indep.spi,ate$ate.sims.cwa.spi, ate$ate.sims.state.spi,ate$ate.sims.reg.spi, ate$ate.sims.US.spi)               
ates <- c(rep(ate.obs, 5000), rep(ate.obs.index, 5000), rep(ate.obs.spi,5000))    
df <- data.frame(cbind(null_ests, cluster_lev, treat_meas, ates), stringsAsFactors = F)

df$null_ests <- as.numeric(df$null_ests)
df$cluster_lev <- factor(df$cluster_lev, levels=c("County", "County Warning Area", "State", "Weather Region", "US"))
df$treat_meas <- as.factor(df$treat_meas)
df$ates <- as.numeric(df$ates)
levels(df$cluster_lev)
pdf(file = "figure4.pdf", onefile=FALSE,paper="special",width=8,height=8)
par(mar=c(5.1, 5.1, 4.1, 2.1))
P  <- ggplot(df, aes(x=null_ests)) + geom_histogram(bins=60) + xlim(-2,2) + facet_grid(cluster_lev~treat_meas, scales="free_y") + theme(strip.text.y = element_text(size = 8, colour = "red", angle = 90))
P + geom_vline(aes(xintercept=ates), colour="red", df) + theme_bw() + xlab("ATE Estimates Under Sharp Null Hypothesis") + ylab("Count")
dev.off()


